Segmenting Consumers of Bath Soap

library(tidyverse)
library(factoextra)
library(ISLR)
library(GGally)
library(viridis)
library(flexclust)
library(dplyr)
library(ggplot2)
setwd("~/Desktop")
BathSoap <- read.csv("BathSoap.csv")
BathSoap <- data.frame(lapply(BathSoap, function(x) as.numeric(sub("%", "", x))))
BathSoap <- BathSoap %>% mutate_at(c(20:46), funs(./100)) %>% mutate_at(c(20:46), funs(.*BathSoap$Total.Volume))
head(BathSoap)
##   Member.id SEC FEH MT SEX AGE EDU HS CHILD CS Affluence.Index
## 1   1010010   4   3 10   1   4   4  2     4  1               2
## 2   1010020   3   2 10   2   2   4  4     2  1              19
## 3   1014020   2   3 10   2   4   5  6     4  1              23
## 4   1014030   4   0  0   0   4   0  0     5  0               0
## 5   1014190   4   1 10   2   3   4  4     3  1              10
## 6   1017020   4   3 10   2   3   4  5     2  1              13
##   No..of.Brands Brand.Runs Total.Volume No..of..Trans  Value
## 1             3         17         8025            24  818.0
## 2             5         25        13975            40 1681.5
## 3             5         37        23100            63 1950.0
## 4             2          4         1500             4  114.0
## 5             3          6         8300            13  591.0
## 6             3         26        18175            41 1705.5
##   Trans...Brand.Runs Vol.Tran Avg..Price Pur.Vol.No.Promo....
## 1               1.41   334.38      10.19              8025.00
## 2               1.60   349.38      12.03             12437.75
## 3               1.70   366.67       8.44             21714.00
## 4               1.00   375.00       7.60              1500.00
## 5               2.17   638.46       7.12              5063.00
## 6               1.58   443.29       9.38             18175.00
##   Pur.Vol.Promo.6.. Pur.Vol.Other.Promo.. Br..Cd..57..144 Br..Cd..55
## 1               0.0                   0.0          3049.5    1043.25
## 2            1397.5                 279.5           279.5    1118.00
## 3             462.0                 924.0           693.0   12705.00
## 4               0.0                   0.0           600.0     900.00
## 5            1162.0                1992.0           415.0    1162.00
## 6               0.0                   0.0          1454.0    1272.25
##   Br..Cd..272 Br..Cd..286 Br..Cd..24 Br..Cd..481 Br..Cd..352 Br..Cd..5
## 1           0           0          0         0.0           0       0.0
## 2           0           0          0       838.5           0    1956.5
## 3           0         693          0         0.0           0     462.0
## 4           0           0          0         0.0           0       0.0
## 5           0           0          0         0.0           0       0.0
## 6           0           0          0         0.0           0       0.0
##   Others.999 Pr.Cat.1 Pr.Cat.2 Pr.Cat.3 Pr.Cat.4 PropCat.5 PropCat.6
## 1   3948.300  1845.75  4494.00  1043.25   561.75   4012.50      0.00
## 2   9768.525  4052.75  7686.25  1257.75   838.50   6428.50   4891.25
## 3   8754.900  2772.00  7392.00 12936.00     0.00   5544.00   2772.00
## 4      0.000     0.00   600.00   900.00     0.00    600.00      0.00
## 5   6698.100     0.00   415.00  1162.00  6723.00   6723.00      0.00
## 6  15575.975  3998.50  8178.75  1272.25  4907.25   8905.75   1817.50
##   PropCat.7 PropCat.8 PropCat.9 PropCat.10 PropCat.11 PropCat.12
## 1      0.00      0.00      0.00          0        0.0     240.75
## 2    419.25    279.50    139.75          0      838.5       0.00
## 3    693.00    231.00    231.00          0        0.0     462.00
## 4      0.00      0.00      0.00          0        0.0       0.00
## 5      0.00    415.00      0.00          0        0.0       0.00
## 6      0.00    181.75   1272.25          0        0.0       0.00
##   PropCat.13 PropCat.14 PropCat.15
## 1          0    1043.25    2728.50
## 2          0    1118.00       0.00
## 3          0   12936.00       0.00
## 4          0     900.00       0.00
## 5          0    1162.00       0.00
## 6          0    1272.25    4907.25
BS <- scale(BathSoap[,-c(1:11)])
BS <- cbind(BathSoap[,1:11],BS)
head(BS)
##   Member.id SEC FEH MT SEX AGE EDU HS CHILD CS Affluence.Index
## 1   1010010   4   3 10   1   4   4  2     4  1               2
## 2   1010020   3   2 10   2   2   4  4     2  1              19
## 3   1014020   2   3 10   2   4   5  6     4  1              23
## 4   1014030   4   0  0   0   4   0  0     5  0               0
## 5   1014190   4   1 10   2   3   4  4     3  1              10
## 6   1017020   4   3 10   2   3   4  5     2  1              13
##   No..of.Brands Brand.Runs Total.Volume No..of..Trans      Value
## 1    -0.4030277  0.1200727   -0.5005898    -0.4104681 -0.5881031
## 2     0.8630280  0.8895639    0.2651391     0.5076339  0.3896410
## 3     0.8630280  2.0438006    1.4394712     1.8274054  0.6936645
## 4    -1.0360556 -1.1303505   -1.3403176    -1.5580955 -1.3852447
## 5    -0.4030277 -0.9379777   -0.4651989    -1.0416632 -0.8451360
## 6    -0.4030277  0.9857502    0.8056536     0.5650152  0.4168163
##   Trans...Brand.Runs   Vol.Tran  Avg..Price Pur.Vol.No.Promo....
## 1         -0.4636969 -0.3242918 -0.43944366           -0.3943558
## 2         -0.3907514 -0.2639930  0.05217678            0.1983882
## 3         -0.3523590 -0.1944886 -0.90701745            1.4444233
## 4         -0.6211057 -0.1610026 -1.13145287           -1.2708284
## 5         -0.1719147  0.8980852 -1.25970168           -0.7922274
## 6         -0.3984298  0.1135176 -0.65586353            0.9690461
##   Pur.Vol.Promo.6.. Pur.Vol.Other.Promo.. Br..Cd..57..144 Br..Cd..55
## 1        -0.5574329            -0.5116939       0.2226321 -0.1754393
## 2         0.7686897            -0.1252184      -0.4805832 -0.1578175
## 3        -0.1190296             0.7659568      -0.3756086  2.5737501
## 4        -0.5574329            -0.5116939      -0.3992184 -0.2092097
## 5         0.5452179             2.2427218      -0.4461840 -0.1474447
## 6        -0.5574329            -0.5116939      -0.1824148 -0.1214539
##   Br..Cd..272 Br..Cd..286 Br..Cd..24 Br..Cd..481 Br..Cd..352  Br..Cd..5
## 1  -0.3448178  -0.2253619 -0.2371511  -0.2538919  -0.2549821 -0.2929379
## 2  -0.3448178  -0.2253619 -0.2371511   0.4047639  -0.2549821  2.6176765
## 3  -0.3448178   0.1428499 -0.2371511  -0.2538919  -0.2549821  0.3943628
## 4  -0.3448178  -0.2253619 -0.2371511  -0.2538919  -0.2549821 -0.2929379
## 5  -0.3448178  -0.2253619 -0.2371511  -0.2538919  -0.2549821 -0.2929379
## 6  -0.3448178  -0.2253619 -0.2371511  -0.2538919  -0.2549821 -0.2929379
##   Others.999      Pr.Cat.1   Pr.Cat.2   Pr.Cat.3   Pr.Cat.4    PropCat.5
## 1 -0.3870628 -0.2794416143 -0.2520866 -0.1988988 -0.1925159 -0.246319991
## 2  0.6565013  0.3854838369  0.2625352 -0.1496659 -0.1056810  0.141979507
## 3  0.4747587 -0.0003808082  0.2150992  2.5307700 -0.3687742 -0.000177325
## 4 -1.0949914 -0.8355295851 -0.8798374 -0.2317780 -0.3687742 -0.794776958
## 5  0.1059753 -0.8355295851 -0.9096612 -0.1716428  1.7406778  0.189311544
## 6  1.6977747  0.3691393848  0.3419310 -0.1463378  1.1709563  0.540123103
##    PropCat.6  PropCat.7  PropCat.8  PropCat.9 PropCat.10 PropCat.11
## 1 -0.4980207 -0.4173594 -0.5069920 -0.4317488 -0.2850019 -0.2651424
## 2  1.6247065 -0.2567103 -0.3267324 -0.2787491 -0.2850019  0.3729320
## 3  0.7049846 -0.1518142 -0.3580118 -0.1788477 -0.2850019 -0.2651424
## 4 -0.4980207 -0.4173594 -0.5069920 -0.4317488 -0.2850019 -0.2651424
## 5 -0.4980207 -0.4173594 -0.2393435 -0.4317488 -0.2850019 -0.2651424
## 6  0.2907463 -0.4173594 -0.3897749  0.9611232 -0.2850019 -0.2651424
##   PropCat.12 PropCat.13 PropCat.14 PropCat.15
## 1  0.8905060 -0.2536688 -0.1912455  2.1061267
## 2 -0.2907978 -0.2536688 -0.1739337 -0.2505867
## 3  1.9761280 -0.2536688  2.5630697 -0.2505867
## 4 -0.2907978 -0.2536688 -0.2244217 -0.2505867
## 5 -0.2907978 -0.2536688 -0.1637435 -0.2505867
## 6 -0.2907978 -0.2536688 -0.1382100  3.9879993

Identify clusters of households based on Purchase behavior

fviz_nbclust(BS[,c(12:31)], kmeans, method = "wss")

fviz_nbclust(BS[,c(12:31)], kmeans, method = "silhouette")

k = 2

set.seed(120)
pk2 <- kmeans(BS[,c(12:31)], centers = 2, nstart = 25)
pk2
## K-means clustering with 2 clusters of sizes 148, 452
## 
## Cluster means:
##   No..of.Brands Brand.Runs Total.Volume No..of..Trans     Value
## 1     0.3839799  0.5256153    1.3353247     0.7871742  1.176034
## 2    -0.1257279 -0.1721041   -0.4372302    -0.2577473 -0.385073
##   Trans...Brand.Runs   Vol.Tran  Avg..Price Pur.Vol.No.Promo....
## 1         0.18290360  0.7673642 -0.27557620            1.2822270
## 2        -0.05988879 -0.2512609  0.09023291           -0.4198442
##   Pur.Vol.Promo.6.. Pur.Vol.Other.Promo.. Br..Cd..57..144 Br..Cd..55
## 1         0.4821548             0.4661500       0.3593261  0.4911850
## 2        -0.1578737            -0.1526332      -0.1176554 -0.1608305
##   Br..Cd..272 Br..Cd..286  Br..Cd..24 Br..Cd..481 Br..Cd..352   Br..Cd..5
## 1  0.16526060  0.20168930  0.13563777  0.30345639  0.08320394  0.24331419
## 2 -0.05411188 -0.06603986 -0.04441237 -0.09936183 -0.02724377 -0.07966925
##   Others.999
## 1  0.9988818
## 2 -0.3270675
## 
## Clustering vector:
##   [1] 2 1 1 2 2 1 2 2 1 2 1 2 1 2 2 2 1 2 2 2 2 2 1 2 2 1 2 2 2 2 2 2 2 1 1
##  [36] 2 2 2 2 2 2 2 1 2 1 2 2 2 2 1 1 2 2 1 2 1 1 1 1 2 1 1 1 2 2 2 1 1 2 2
##  [71] 2 2 2 2 2 2 2 2 1 2 2 1 2 2 2 1 2 1 2 2 2 1 2 2 2 2 1 2 2 2 2 2 1 1 2
## [106] 1 1 2 2 2 2 2 2 1 1 2 1 2 1 2 1 1 2 1 1 1 1 2 2 1 1 1 2 2 2 2 2 2 2 1
## [141] 2 2 2 2 1 2 2 2 2 2 2 2 2 2 2 2 2 2 2 1 1 1 1 1 1 2 1 2 1 2 2 2 2 2 1
## [176] 2 1 2 1 2 2 1 2 2 1 2 1 1 1 2 2 1 2 2 1 2 2 2 1 2 2 1 1 2 2 2 2 1 2 1
## [211] 2 2 2 1 1 2 2 2 2 2 2 1 1 2 2 2 2 1 2 2 2 1 2 1 2 1 1 2 1 2 2 2 2 2 2
## [246] 2 2 2 2 2 2 2 2 1 2 2 2 1 1 2 2 2 2 1 1 2 2 2 2 2 1 1 2 2 2 2 1 2 2 2
## [281] 1 1 2 1 1 2 2 2 2 2 2 2 2 2 2 2 2 2 1 2 2 2 2 2 2 2 2 2 2 2 2 1 1 2 1
## [316] 1 2 2 1 2 1 2 1 1 2 2 1 2 2 2 1 2 2 2 2 2 1 2 2 2 2 2 1 2 2 2 1 2 2 2
## [351] 2 2 2 2 2 2 1 2 2 1 2 2 1 1 1 2 2 2 2 2 2 2 2 2 2 2 2 2 1 2 2 2 1 2 1
## [386] 2 2 2 2 2 1 2 2 2 1 2 2 2 2 2 2 1 1 2 2 2 2 2 1 2 2 2 2 2 2 2 2 2 2 2
## [421] 1 2 2 2 2 1 2 2 2 2 1 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2
## [456] 2 2 1 2 2 2 2 2 2 1 1 2 2 2 2 2 2 2 2 1 1 2 2 2 1 2 2 2 2 2 2 2 2 2 2
## [491] 2 2 2 2 1 2 2 1 1 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 1 2 2 2 1 2
## [526] 2 2 2 2 2 2 2 1 2 2 2 2 2 2 2 2 2 2 1 1 1 2 2 2 1 2 2 2 1 2 2 2 2 2 2
## [561] 2 2 2 2 1 2 2 2 2 2 2 1 2 2 1 2 2 2 2 2 2 2 2 2 2 2 1 2 2 1 2 2 2 2 2
## [596] 2 2 1 2 2
## 
## Within cluster sum of squares by cluster:
## [1] 5379.942 4907.679
##  (between_SS / total_SS =  14.1 %)
## 
## Available components:
## 
## [1] "cluster"      "centers"      "totss"        "withinss"    
## [5] "tot.withinss" "betweenss"    "size"         "iter"        
## [9] "ifault"
fviz_cluster(pk2, data = BS)

Identify clusters of households based on Basis of purchase

fviz_nbclust(BS[,c(32:46)], kmeans, method = "wss")

fviz_nbclust(BS[,c(32:46)], kmeans, method = "silhouette")

k = 4

set.seed(120)
bk4 <- kmeans(BS[,c(32:46)], centers = 4, nstart = 25)
bk4
## K-means clustering with 4 clusters of sizes 99, 48, 56, 397
## 
## Cluster means:
##     Pr.Cat.1   Pr.Cat.2   Pr.Cat.3    Pr.Cat.4  PropCat.5   PropCat.6
## 1  0.1661731  1.1324062 -0.1806369  0.77916278  1.4390664 -0.01285879
## 2  1.8559024  0.1231322 -0.3602898 -0.24398568 -0.2408319  0.97459347
## 3 -0.5683449 -0.5400881  2.6903448 -0.08367702 -0.5188230  0.07317085
## 4 -0.1856603 -0.2210923 -0.2908875 -0.15299720 -0.2565581 -0.12494971
##     PropCat.7   PropCat.8   PropCat.9  PropCat.10 PropCat.11 PropCat.12
## 1 -0.09915360  0.07733577  0.93989828 -0.09869787  0.3519956 -0.1030397
## 2  1.15236127  0.02499432  0.05503680  1.09157261 -0.0161106  1.8006345
## 3 -0.34781908 -0.40801906 -0.05306282 -0.25297241 -0.1928943 -0.1256151
## 4 -0.06553971  0.03524710 -0.23355209 -0.07168247 -0.0586201 -0.1742949
##   PropCat.13 PropCat.14  PropCat.15
## 1 -0.2171882 -0.1806510  0.19702711
## 2  1.5052363 -0.3567717 -0.02484209
## 3 -0.1748551  2.6852954 -0.15972750
## 4 -0.1031683 -0.2905971 -0.02359830
## 
## Clustering vector:
##   [1] 4 4 3 4 4 1 4 3 1 4 1 4 1 4 4 4 1 4 1 4 4 3 3 4 4 2 4 1 4 4 3 4 4 3 3
##  [36] 4 4 4 3 4 4 3 3 4 1 4 4 4 4 2 3 1 4 1 3 1 3 1 3 4 4 3 3 4 4 3 3 1 4 4
##  [71] 4 4 3 4 4 4 4 4 2 4 4 3 3 4 4 1 4 1 4 3 2 3 3 4 2 4 3 4 4 4 4 4 1 4 1
## [106] 1 2 4 4 4 3 1 4 1 1 4 4 3 1 4 1 2 1 2 1 3 1 4 4 2 1 2 4 4 4 2 4 4 4 1
## [141] 1 3 4 3 2 3 4 4 4 4 4 4 4 3 4 4 4 4 4 3 3 3 1 2 1 4 1 4 1 4 1 4 4 3 1
## [176] 4 1 3 3 1 4 1 2 4 1 1 1 1 1 4 1 2 4 4 4 4 4 4 2 4 1 1 3 4 3 4 4 1 4 1
## [211] 4 4 4 1 1 4 4 4 3 2 4 1 3 4 4 4 4 4 4 4 3 1 3 2 4 3 3 3 3 4 4 4 4 4 3
## [246] 4 4 4 4 4 4 4 4 1 1 4 1 3 1 4 4 4 4 1 1 4 4 4 4 4 1 1 4 4 4 4 1 4 4 4
## [281] 1 1 4 1 1 4 4 4 4 2 4 4 4 4 4 1 3 1 1 2 4 4 4 2 4 4 4 4 4 2 4 1 1 2 1
## [316] 1 4 4 4 4 3 4 1 3 4 4 2 4 4 4 1 4 4 4 4 4 1 4 4 4 4 4 1 4 4 4 4 4 4 4
## [351] 4 4 2 4 4 4 2 4 2 4 4 4 1 2 2 4 4 4 4 4 4 4 4 4 3 4 4 4 1 4 4 4 1 4 4
## [386] 4 4 1 4 4 2 4 4 4 4 4 4 4 4 4 4 2 1 2 4 4 4 4 2 4 4 4 2 4 4 4 4 4 4 4
## [421] 1 4 4 4 4 4 4 4 4 4 1 1 1 4 2 4 4 4 4 4 4 4 4 2 4 4 4 4 4 4 4 4 4 2 4
## [456] 4 1 2 1 4 4 4 4 2 1 3 4 4 4 4 4 4 4 1 4 1 1 4 4 2 4 4 4 2 4 4 4 4 4 4
## [491] 4 4 4 4 2 2 4 4 1 4 3 4 4 4 4 4 4 4 4 4 4 4 4 2 4 4 4 4 4 4 4 4 4 1 4
## [526] 1 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 2 4 1 4 4 4 4 4 4 4 4 4 4 1 4 4 2
## [561] 4 1 4 4 4 4 4 4 4 4 4 4 4 4 2 4 4 4 4 4 4 4 4 4 4 4 2 4 4 1 4 4 4 4 4
## [596] 4 4 1 4 4
## 
## Within cluster sum of squares by cluster:
## [1] 2252.4953 1891.6807  410.5978 2231.0908
##  (between_SS / total_SS =  24.5 %)
## 
## Available components:
## 
## [1] "cluster"      "centers"      "totss"        "withinss"    
## [5] "tot.withinss" "betweenss"    "size"         "iter"        
## [9] "ifault"
fviz_cluster(bk4, data = BS)

Purchase behavior and Basis of purchase, k = 3

fviz_nbclust(BS[,c(12:46)], kmeans, method = "wss")

fviz_nbclust(BS[,c(12:46)], kmeans, method = "silhouette")

set.seed(120)
k3 <- kmeans(BS[,c(12:46)], centers = 3, nstart = 25)
k3
## K-means clustering with 3 clusters of sizes 424, 54, 122
## 
## Cluster means:
##   No..of.Brands Brand.Runs Total.Volume No..of..Trans      Value
## 1   -0.09248577 -0.1321898   -0.4656020   -0.25848864 -0.3708717
## 2   -0.35613679 -0.5924191    0.7172361   -0.01198633 -0.1557435
## 3    0.47906028  0.7216319    1.3006927    0.90365940  1.3578667
##   Trans...Brand.Runs   Vol.Tran  Avg..Price Pur.Vol.No.Promo....
## 1        -0.15865849 -0.2872994  0.18397336           -0.4467837
## 2         1.35361406  0.7513708 -1.35875806            0.6967571
## 3        -0.04773737  0.6659092 -0.03796531            1.2443557
##   Pur.Vol.Promo.6.. Pur.Vol.Other.Promo.. Br..Cd..57..144 Br..Cd..55
## 1       -0.14768724            -0.1952011     -0.09251371 -0.2952935
## 2       -0.05519063             0.6157772     -0.44555219  2.7091279
## 3        0.53770232             0.4058468      0.51873469 -0.1728562
##   Br..Cd..272 Br..Cd..286  Br..Cd..24 Br..Cd..481 Br..Cd..352   Br..Cd..5
## 1 -0.03609944 -0.05447222 -0.03827278  -0.0913534 -0.01319092 -0.05926465
## 2 -0.27633726 -0.17052912 -0.13783678  -0.1765512 -0.21847185 -0.17453774
## 3  0.24777356  0.26479340  0.19402330   0.3956361  0.14254450  0.28322337
##   Others.999   Pr.Cat.1   Pr.Cat.2   Pr.Cat.3   Pr.Cat.4  PropCat.5
## 1 -0.2982449 -0.1419520 -0.2328618 -0.2977887 -0.1273257 -0.2135347
## 2 -0.4971270 -0.5788665 -0.6174472  2.7013045 -0.1323849 -0.5807933
## 3  1.2565630  0.7495611  1.0825866 -0.1607215  0.5011054  0.9991930
##    PropCat.6   PropCat.7   PropCat.8   PropCat.9  PropCat.10  PropCat.11
## 1 -0.1500091 -0.06716428 -0.05191804 -0.15715382 -0.01087804 -0.09104144
## 2 -0.0746225 -0.35998537 -0.41496499 -0.05220554 -0.25178613 -0.19021844
## 3  0.5543727  0.39276120  0.36410949  0.56928130  0.14925196  0.40060137
##   PropCat.12  PropCat.13 PropCat.14  PropCat.15
## 1 -0.1015529 -0.03311153 -0.2972731 -0.07129654
## 2 -0.1194972 -0.17193607  2.7170638 -0.15636234
## 3  0.4058302  0.19117899 -0.1694891  0.31699425
## 
## Clustering vector:
##   [1] 1 1 2 1 1 3 1 2 3 1 3 1 3 1 1 1 3 1 1 1 2 2 2 1 1 3 1 1 1 1 2 1 1 2 2
##  [36] 1 1 1 2 1 1 2 2 1 3 1 1 1 1 3 2 1 1 3 2 3 2 3 2 1 3 2 2 1 1 2 2 3 1 1
##  [71] 1 1 2 1 1 1 1 1 3 1 1 2 2 1 1 3 1 3 1 2 1 2 2 1 1 1 2 1 1 1 1 1 3 3 1
## [106] 3 3 1 1 1 2 1 1 3 3 1 3 2 3 1 3 3 1 3 3 2 3 1 1 3 3 3 1 1 2 1 1 1 1 3
## [141] 1 2 1 1 3 2 1 1 1 1 1 1 1 2 1 1 1 1 1 2 2 2 3 3 3 1 3 1 3 1 1 1 1 2 3
## [176] 1 3 2 2 1 1 3 1 1 3 1 3 3 3 1 1 3 1 1 1 1 1 1 3 1 1 3 3 1 2 1 1 3 1 3
## [211] 1 1 1 3 3 1 1 1 2 1 1 3 2 1 1 1 1 3 1 1 2 3 2 3 1 2 2 2 2 1 1 1 1 1 2
## [246] 1 1 1 1 1 1 1 1 3 1 1 1 2 3 1 1 1 1 3 3 1 1 1 1 1 3 3 1 1 1 1 3 1 1 1
## [281] 3 3 1 3 3 1 1 1 1 1 1 1 1 1 1 1 2 1 3 1 1 1 1 3 1 1 1 1 1 1 1 3 3 1 3
## [316] 3 1 1 3 1 2 1 3 3 1 1 3 1 1 1 3 1 1 1 1 1 3 1 1 1 1 1 3 1 1 1 3 1 1 1
## [351] 1 1 1 1 1 1 3 1 1 3 1 1 3 3 3 1 1 1 1 1 1 1 1 1 2 1 1 1 3 1 1 1 3 1 3
## [386] 1 1 1 1 1 3 1 1 1 3 1 1 1 1 1 1 3 3 1 1 1 1 1 3 1 1 1 1 1 1 1 1 1 1 1
## [421] 3 1 1 1 1 3 1 1 1 1 3 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## [456] 1 1 3 3 1 1 1 1 1 3 2 1 1 1 1 1 1 1 1 3 3 1 1 1 3 1 1 1 1 1 1 1 1 1 1
## [491] 1 1 1 1 3 1 1 3 3 1 1 1 1 1 1 1 1 1 1 1 1 1 1 3 1 1 1 1 1 1 1 1 1 3 1
## [526] 1 1 1 1 1 1 1 3 1 1 1 1 1 1 1 1 1 1 3 3 3 1 1 1 3 1 1 1 3 1 1 1 1 1 1
## [561] 1 1 1 1 3 1 1 1 1 1 1 3 1 1 3 1 1 1 1 1 1 1 1 1 1 1 3 1 1 3 1 1 1 1 1
## [596] 1 1 3 1 1
## 
## Within cluster sum of squares by cluster:
## [1] 7224.622 1215.646 8528.354
##  (between_SS / total_SS =  19.1 %)
## 
## Available components:
## 
## [1] "cluster"      "centers"      "totss"        "withinss"    
## [5] "tot.withinss" "betweenss"    "size"         "iter"        
## [9] "ifault"
fviz_cluster(k3, data = BS)

k3center <- as.data.frame(k3$centers)
colnames(k3center) <- c("No.Br","Br.Runs","Total.Vol","No.Trans","Value","Trans.Br.Runs","Vol.Tran","Avg.Price","PVNP","PVP6","PVOP","Br.57.144","Br.55","Br.272","Br.286","Br.24","Br.481","Br.352","Br.5","Br.999","Pt1","Pt2","Pt3","Pt4","Pt5","Pt6","Pt7","Pt8","Pt9","Pt10","Pt11","Pt12","Pt13","Pt14","Pt15")
cluster <- matrix(c("1","2","3"),nrow = 3)
k3center <- cbind(cluster,k3center)

Purchase summary over the period, k = 3

ggparcoord(k3center, columns = 2:9, groupColumn = 1, showPoints = TRUE, alphaLines = 0.3)

Purchase within promotion, k = 3

ggparcoord(k3center, columns = 10:12, groupColumn = 1, showPoints = TRUE, alphaLines = 0.3)

Brandwise purchase, k = 3

ggparcoord(k3center, columns = 13:21, groupColumn = 1, showPoints = TRUE, alphaLines = 0.3)

Price categorywise purchase, k = 3

ggparcoord(k3center, columns = 22:25, groupColumn = 1, showPoints = TRUE, alphaLines = 0.3)

Selling propositionwise purchase, k = 3

ggparcoord(k3center, columns = 26:36, groupColumn = 1, showPoints = TRUE, alphaLines = 0.3)

Purchase behavior and Basis of purchase, k = 2

set.seed(120)
k2 <- kmeans(BS[,c(12:46)], centers = 2, nstart = 25)
k2
## K-means clustering with 2 clusters of sizes 466, 134
## 
## Cluster means:
##   No..of.Brands Brand.Runs Total.Volume No..of..Trans      Value
## 1    -0.1449263 -0.2041952   -0.3847868    -0.2643057 -0.3750085
## 2     0.5039973  0.7101116    1.3381391     0.9191525  1.3041341
##   Trans...Brand.Runs   Vol.Tran  Avg..Price Pur.Vol.No.Promo....
## 1       -0.001571150 -0.1865315  0.03398412           -0.3661046
## 2        0.005463849  0.6486840 -0.11818358            1.2731696
##   Pur.Vol.Promo.6.. Pur.Vol.Other.Promo.. Br..Cd..57..144  Br..Cd..55
## 1        -0.1679486            -0.1283127      -0.1297062 -0.03331724
## 2         0.5840601             0.4462216       0.4510677  0.11586443
##   Br..Cd..272 Br..Cd..286  Br..Cd..24 Br..Cd..481 Br..Cd..352   Br..Cd..5
## 1 -0.06196977 -0.06752008 -0.05093196  -0.1011599  -0.0336199 -0.07533559
## 2  0.21550681  0.23480864  0.17712160   0.3517949   0.1169170  0.26198795
##   Others.999   Pr.Cat.1   Pr.Cat.2    Pr.Cat.3   Pr.Cat.4  PropCat.5
## 1 -0.3379028 -0.1938666 -0.2885875 -0.03667022 -0.1239964 -0.2579915
## 2  1.1750949  0.6741928  1.0035952  0.12752481  0.4312115  0.8971943
##    PropCat.6  PropCat.7   PropCat.8  PropCat.9  PropCat.10 PropCat.11
## 1 -0.1622671 -0.0955427 -0.09287433 -0.1610016 -0.03283336 -0.1020414
## 2  0.5643019  0.3322604  0.32298088  0.5599011  0.11418168  0.3548603
##   PropCat.12  PropCat.13  PropCat.14  PropCat.15
## 1 -0.1073432 -0.04865676 -0.03397851 -0.08876794
## 2  0.3732981  0.16920932  0.11816407  0.30870044
## 
## Clustering vector:
##   [1] 1 2 2 1 1 2 1 1 2 1 2 1 2 1 1 1 2 1 1 1 1 1 2 1 1 2 1 1 1 1 1 1 1 1 1
##  [36] 1 1 1 1 1 1 1 1 1 2 1 1 1 1 2 1 1 1 2 1 2 2 2 1 1 2 1 1 1 1 1 2 2 1 1
##  [71] 1 1 1 1 1 1 1 1 2 1 1 2 1 1 1 2 1 2 1 1 1 2 1 1 1 1 1 1 1 1 1 1 2 2 1
## [106] 2 2 1 1 1 1 1 1 2 2 1 2 1 2 1 2 2 1 2 2 2 2 1 1 2 2 2 1 1 1 1 1 1 1 2
## [141] 1 1 1 1 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 1 1 2 2 2 1 2 1 2 1 1 1 1 1 2
## [176] 1 2 1 2 1 1 2 1 1 2 1 2 2 2 1 1 2 1 1 1 1 1 1 2 1 1 2 2 1 1 1 1 2 1 2
## [211] 1 1 2 2 2 1 1 1 1 1 1 2 1 1 1 1 1 2 1 1 1 2 1 2 1 1 1 1 1 1 1 1 1 1 1
## [246] 1 1 1 1 1 1 1 1 2 1 1 1 1 2 1 1 1 1 2 2 1 1 1 1 1 2 2 1 1 1 1 2 1 1 1
## [281] 2 2 1 2 2 1 1 1 1 1 1 1 1 1 1 1 1 1 2 1 1 1 1 2 1 1 1 1 1 1 1 2 2 1 2
## [316] 2 1 1 2 1 2 1 2 2 1 1 2 1 1 1 2 1 1 1 1 1 2 1 1 1 1 1 1 1 1 1 2 1 1 1
## [351] 1 1 1 1 1 1 2 1 1 2 1 1 2 2 2 1 1 1 1 1 1 1 1 1 1 1 1 1 2 1 1 1 2 1 2
## [386] 1 1 1 1 1 2 1 1 1 2 1 1 1 1 1 1 2 2 1 1 1 1 1 2 1 1 1 1 1 1 1 1 1 1 1
## [421] 2 1 1 1 1 2 1 1 1 1 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## [456] 1 1 2 2 1 1 1 1 1 2 2 1 1 1 1 1 1 1 1 2 2 1 1 1 2 1 1 1 1 1 1 1 1 1 1
## [491] 1 1 1 1 2 1 1 2 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 1 1 1 1 1 1 1 1 1 2 1
## [526] 1 1 1 1 1 1 1 2 1 1 1 1 1 1 1 1 1 1 2 2 2 1 1 1 2 1 1 1 2 1 1 1 1 1 1
## [561] 1 1 1 1 2 1 1 1 1 1 1 2 1 1 2 1 1 1 1 1 1 1 1 1 1 1 2 1 1 2 1 1 1 1 1
## [596] 1 1 2 1 1
## 
## Within cluster sum of squares by cluster:
## [1] 9329.794 9330.351
##  (between_SS / total_SS =  11.0 %)
## 
## Available components:
## 
## [1] "cluster"      "centers"      "totss"        "withinss"    
## [5] "tot.withinss" "betweenss"    "size"         "iter"        
## [9] "ifault"
fviz_cluster(k2, data = BS)

k2center <- as.data.frame(k2$centers)
colnames(k2center) <- c("No.Br","Br.Runs","Total.Vol","No.Trans","Value","Trans.Br.Runs","Vol.Tran","Avg.Price","PVNP","PVP6","PVOP","Br.57.144","Br.55","Br.272","Br.286","Br.24","Br.481","Br.352","Br.5","Br.999","Pt1","Pt2","Pt3","Pt4","Pt5","Pt6","Pt7","Pt8","Pt9","Pt10","Pt11","Pt12","Pt13","Pt14","Pt15")
cluster <- matrix(c("1","2"),nrow = 2)
k2center <- cbind(cluster,k2center)

Purchase summary over the period, k = 2

ggparcoord(k2center, columns = 2:9, groupColumn = 1, showPoints = TRUE, alphaLines = 0.3)

Purchase within promotion, k = 2

ggparcoord(k2center, columns = 10:12, groupColumn = 1, showPoints = TRUE, alphaLines = 0.3)

Brandwise purchase, k = 2

ggparcoord(k2center, columns = 13:21, groupColumn = 1, showPoints = TRUE, alphaLines = 0.3)

Price categorywise purchase, k = 2

ggparcoord(k2center, columns = 22:25, groupColumn = 1, showPoints = TRUE, alphaLines = 0.3)

Selling propositionwise purchase, k = 2

ggparcoord(k2center, columns = 26:36, groupColumn = 1, showPoints = TRUE, alphaLines = 0.3)

Purchase behavior and Basis of purchase, k = 4

set.seed(120)
k4 <- kmeans(BS[,c(12:46)], centers = 4, nstart = 25)
k4
## K-means clustering with 4 clusters of sizes 52, 159, 351, 38
## 
## Cluster means:
##   No..of.Brands Brand.Runs Total.Volume No..of..Trans       Value
## 1    -0.3178125 -0.5550818    0.8179290    0.06954917 -0.08493788
## 2     0.7435888  0.9754662    0.3226669    0.96632396  0.65925598
## 3    -0.2713724 -0.3531863   -0.5144466   -0.46539726 -0.50279058
## 4    -0.1698070 -0.0596440    2.2824841    0.16032557  2.00196217
##   Trans...Brand.Runs   Vol.Tran   Avg..Price Pur.Vol.No.Promo....
## 1         1.35462582  0.7507119 -1.338110210            0.8055906
## 2        -0.20919640 -0.3438857  0.514423676            0.2621991
## 3        -0.11532150 -0.1972378  0.008589846           -0.4838254
## 4         0.08682978  2.2334547 -0.400701833            2.2695356
##   Pur.Vol.Promo.6.. Pur.Vol.Other.Promo.. Br..Cd..57..144  Br..Cd..55
## 1       -0.03587362             0.5481555     -0.42588824  2.78575343
## 2        0.38892339             0.2020246     -0.11126248 -0.30328607
## 3       -0.21838677            -0.2361182     -0.06576176 -0.27053370
## 4        0.43895702             0.5855657      1.65577105 -0.04419375
##   Br..Cd..272 Br..Cd..286 Br..Cd..24 Br..Cd..481 Br..Cd..352  Br..Cd..5
## 1 -0.27370339 -0.16842017 -0.1381482 -0.17357659 -0.21706761 -0.1699839
## 2  0.22597542  0.01829106  0.4568449  0.03579013  0.01987912  0.4253193
## 3 -0.08119801 -0.04780401 -0.1648062 -0.08663000  0.02897091 -0.1531229
## 4  0.17902592  0.59549419 -0.2002015  0.88796007 -0.05373823 -0.1326441
##   Others.999   Pr.Cat.1   Pr.Cat.2    Pr.Cat.3    Pr.Cat.4   PropCat.5
## 1 -0.4318378 -0.5442678 -0.5914525  2.77808631 -0.07670589 -0.53360466
## 2  0.5919819  0.9322732  0.1553378 -0.30349931 -0.07144406 -0.04596878
## 3 -0.3849787 -0.3535908 -0.2198827 -0.27093927 -0.08855764 -0.18461162
## 4  1.6699460  0.1100221  2.1904122 -0.02906355  1.22189589  2.62776730
##     PropCat.6   PropCat.7  PropCat.8   PropCat.9  PropCat.10  PropCat.11
## 1 -0.05833796 -0.35777868 -0.4114255  0.05546797 -0.25050860 -0.18733674
## 2  0.46501829  0.37789106  0.4044906  0.35105963  0.34084122  0.04807838
## 3 -0.24130847 -0.09717295 -0.1398660 -0.18987208 -0.11606667 -0.08507712
## 4  0.36302474 -0.19401267  0.1624500  0.20900748 -0.01126065  0.84102937
##   PropCat.12 PropCat.13  PropCat.14 PropCat.15
## 1 -0.1129087 -0.1721924  2.79384348 -0.1527383
## 2  0.3570557  0.4759960 -0.30278794  0.3052338
## 3 -0.1672040 -0.1707809 -0.27083693 -0.1020832
## 4  0.2049469 -0.1785600 -0.05454774 -0.1252252
## 
## Clustering vector:
##   [1] 3 2 1 3 3 2 3 1 4 3 2 2 4 3 3 3 2 3 3 3 3 1 1 3 3 2 3 3 3 3 1 3 3 1 1
##  [36] 3 3 3 1 3 3 1 1 3 4 3 3 3 3 2 1 3 3 4 1 4 1 2 1 3 2 1 1 3 2 1 1 2 3 3
##  [71] 3 3 1 3 3 3 3 3 2 3 3 1 1 3 3 3 3 4 3 1 2 1 1 2 2 2 1 2 3 3 3 3 1 2 3
## [106] 2 2 3 2 3 1 3 3 2 4 2 2 3 2 3 2 2 3 2 4 1 4 3 3 2 2 2 3 3 3 2 3 3 3 4
## [141] 3 1 3 3 2 1 3 3 3 3 3 3 3 1 2 2 2 3 3 1 1 1 4 2 4 2 4 3 2 3 3 3 3 1 4
## [176] 3 4 1 1 3 3 4 2 3 2 3 4 2 4 2 3 2 3 3 3 3 2 3 2 3 3 4 4 3 1 3 3 4 3 2
## [211] 3 3 2 3 4 3 3 3 1 2 3 4 1 3 3 3 3 2 3 3 1 4 1 3 3 1 1 1 1 3 2 2 3 3 1
## [246] 3 3 3 3 3 3 2 3 2 3 3 3 1 4 3 2 3 2 3 2 3 3 3 3 2 4 4 3 3 3 3 2 2 3 3
## [281] 4 3 3 4 2 3 3 3 2 2 2 3 3 3 3 3 1 2 2 2 3 3 3 2 3 3 2 3 3 2 2 2 2 2 4
## [316] 2 3 3 2 2 1 2 4 2 3 3 2 3 3 3 2 3 3 3 3 3 2 2 3 3 3 3 4 3 3 3 2 2 3 3
## [351] 3 3 2 3 2 3 2 2 2 2 3 3 2 2 2 2 3 2 3 3 3 3 3 3 1 3 3 3 2 3 3 3 2 3 2
## [386] 3 3 2 3 3 2 2 3 3 2 3 2 3 3 3 3 2 3 2 3 3 3 3 2 2 3 3 2 3 3 3 2 3 3 3
## [421] 4 3 3 3 3 2 3 3 2 3 4 3 3 3 3 3 2 2 3 3 3 3 3 2 3 3 3 3 3 3 3 3 3 3 3
## [456] 3 3 2 2 2 2 3 3 2 2 1 3 3 3 3 3 3 3 3 2 4 2 2 3 2 3 3 2 2 3 3 3 3 2 3
## [491] 3 2 3 3 2 2 3 2 2 3 3 2 3 3 3 3 3 3 2 2 3 3 3 2 3 3 3 2 3 3 3 3 3 4 3
## [526] 3 3 3 2 3 3 3 2 3 3 3 3 3 3 3 3 2 2 2 2 3 3 3 3 2 3 3 3 2 3 3 3 3 2 2
## [561] 3 3 2 2 2 3 3 3 3 2 3 2 3 2 4 3 2 3 3 3 3 3 2 3 3 2 2 3 3 2 3 3 3 3 3
## [596] 3 3 4 3 3
## 
## Within cluster sum of squares by cluster:
## [1] 1180.009 6396.420 5146.643 3181.195
##  (between_SS / total_SS =  24.1 %)
## 
## Available components:
## 
## [1] "cluster"      "centers"      "totss"        "withinss"    
## [5] "tot.withinss" "betweenss"    "size"         "iter"        
## [9] "ifault"
fviz_cluster(k4, data = BS)

k4center <- as.data.frame(k4$centers)
colnames(k4center) <- c("No.Br","Br.Runs","Total.Vol","No.Trans","Value","Trans.Br.Runs","Vol.Tran","Avg.Price","PVNP","PVP6","PVOP","Br.57.144","Br.55","Br.272","Br.286","Br.24","Br.481","Br.352","Br.5","Br.999","Pt1","Pt2","Pt3","Pt4","Pt5","Pt6","Pt7","Pt8","Pt9","Pt10","Pt11","Pt12","Pt13","Pt14","Pt15")
cluster <- matrix(c("1","2","3","4"),nrow = 4)
k4center <- cbind(cluster,k4center)

Purchase summary over the period, k = 4

ggparcoord(k4center, columns = 2:9, groupColumn = 1, showPoints = TRUE, alphaLines = 0.3)

Purchase within promotion, k = 4

ggparcoord(k4center, columns = 10:12, groupColumn = 1, showPoints = TRUE, alphaLines = 0.3)

Brandwise purchase, k = 4

ggparcoord(k4center, columns = 13:21, groupColumn = 1, showPoints = TRUE, alphaLines = 0.3)

Price categorywise purchase, k = 4

ggparcoord(k4center, columns = 22:25, groupColumn = 1, showPoints = TRUE, alphaLines = 0.3)

Selling propositionwise purchase, k = 4

ggparcoord(k4center, columns = 26:36, groupColumn = 1, showPoints = TRUE, alphaLines = 0.3)

By comparing k2, k3 and k4, I think k3 is the best segmentation. Because the clusters from k2 is too broad to achieve the purpose of market segmentation, and two clusters cannot properly characterize the data. And the clusters from k4 is too narrow for market segmentation, some clusters have similar characteristics.

Features of cluster 1: Low customer loyalty, low sensitivity to discount, relatively average purchase rate of different brands, no specific requirements on the price and type of goods, low number of purchased brands, low continuous purchase volume, low total purchase volume, low average single purchase volume, low total purchase value but high average purchase price

Features of cluster 2: High customer loyalty, low number of brands purchased, high purchase volume for specific brands, high purchase volume for certain types of products and certain price level, different sensitivity to different discount, low number of consecutive purchases, high average purchase volume but low total value

Features of cluster 3: Medium customer loyalty, high sensitivity to discount (increase purchases for specific discount), strong purchase desire, medium purchase volume of each brand, high number of brands purchased, high total purchase volume and total value, no specific requirements on the price and type of goods, tend to consecutive purchase

Build the model

BS1 <- scale(BathSoap)
BS2 <- BathSoap[,-c(1:11)]
set.seed(120)
Model_k3 <- kcca(BS2, k = 3, kccaFamily("kmeans"))
Model_k3
## kcca object of family 'kmeans' 
## 
## call:
## kcca(x = BS2, k = 3, family = kccaFamily("kmeans"))
## 
## cluster sizes:
## 
##   1   2   3 
##  35 351 214
k3_cluster <- predict(Model_k3, BS2)
cluster_data <- data.frame(cluster = k3_cluster)
cluster_data <- cbind(cluster_data, BS1)

cluster_data$cluster <- ifelse(cluster_data$cluster==1,1,0)
head(cluster_data)
##   cluster Member.id        SEC         FEH         MT        SEX
## 1       0 -2.065230  1.3405223  0.83849915  0.4241721 -1.1385078
## 2       0 -2.065011  0.4468408 -0.04258577  0.4241721  0.4034892
## 3       0 -1.977294 -0.4468408  0.83849915  0.4241721  0.4034892
## 4       0 -1.977075  1.3405223 -1.80475562 -1.9043115 -2.6805048
## 5       0 -1.973566  1.3405223 -0.92367070  0.4241721  0.4034892
## 6       0 -1.911507  1.3405223  0.83849915  0.4241721  0.4034892
##          AGE         EDU          HS      CHILD        CS Affluence.Index
## 1  0.9089277 -0.01978688 -0.95286121  0.6299072  0.134681      -1.3163799
## 2 -1.4019055 -0.01978688 -0.08333007 -1.0133290  0.134681       0.1735308
## 3  0.9089277  0.43683338  0.78620108  0.6299072  0.134681       0.5240980
## 4  0.9089277 -1.84626790 -1.82239236  1.4515254 -1.836260      -1.4916636
## 5 -0.2464889 -0.01978688 -0.08333007 -0.1917109  0.134681      -0.6152455
## 6 -0.2464889 -0.01978688  0.35143550 -1.0133290  0.134681      -0.3523201
##   No..of.Brands Brand.Runs Total.Volume No..of..Trans      Value
## 1    -0.4030277  0.1200727   -0.5005898    -0.4104681 -0.5881031
## 2     0.8630280  0.8895639    0.2651391     0.5076339  0.3896410
## 3     0.8630280  2.0438006    1.4394712     1.8274054  0.6936645
## 4    -1.0360556 -1.1303505   -1.3403176    -1.5580955 -1.3852447
## 5    -0.4030277 -0.9379777   -0.4651989    -1.0416632 -0.8451360
## 6    -0.4030277  0.9857502    0.8056536     0.5650152  0.4168163
##   Trans...Brand.Runs   Vol.Tran  Avg..Price Pur.Vol.No.Promo....
## 1         -0.4636969 -0.3242918 -0.43944366           -0.3943558
## 2         -0.3907514 -0.2639930  0.05217678            0.1983882
## 3         -0.3523590 -0.1944886 -0.90701745            1.4444233
## 4         -0.6211057 -0.1610026 -1.13145287           -1.2708284
## 5         -0.1719147  0.8980852 -1.25970168           -0.7922274
## 6         -0.3984298  0.1135176 -0.65586353            0.9690461
##   Pur.Vol.Promo.6.. Pur.Vol.Other.Promo.. Br..Cd..57..144 Br..Cd..55
## 1        -0.5574329            -0.5116939       0.2226321 -0.1754393
## 2         0.7686897            -0.1252184      -0.4805832 -0.1578175
## 3        -0.1190296             0.7659568      -0.3756086  2.5737501
## 4        -0.5574329            -0.5116939      -0.3992184 -0.2092097
## 5         0.5452179             2.2427218      -0.4461840 -0.1474447
## 6        -0.5574329            -0.5116939      -0.1824148 -0.1214539
##   Br..Cd..272 Br..Cd..286 Br..Cd..24 Br..Cd..481 Br..Cd..352  Br..Cd..5
## 1  -0.3448178  -0.2253619 -0.2371511  -0.2538919  -0.2549821 -0.2929379
## 2  -0.3448178  -0.2253619 -0.2371511   0.4047639  -0.2549821  2.6176765
## 3  -0.3448178   0.1428499 -0.2371511  -0.2538919  -0.2549821  0.3943628
## 4  -0.3448178  -0.2253619 -0.2371511  -0.2538919  -0.2549821 -0.2929379
## 5  -0.3448178  -0.2253619 -0.2371511  -0.2538919  -0.2549821 -0.2929379
## 6  -0.3448178  -0.2253619 -0.2371511  -0.2538919  -0.2549821 -0.2929379
##   Others.999      Pr.Cat.1   Pr.Cat.2   Pr.Cat.3   Pr.Cat.4    PropCat.5
## 1 -0.3870628 -0.2794416143 -0.2520866 -0.1988988 -0.1925159 -0.246319991
## 2  0.6565013  0.3854838369  0.2625352 -0.1496659 -0.1056810  0.141979507
## 3  0.4747587 -0.0003808082  0.2150992  2.5307700 -0.3687742 -0.000177325
## 4 -1.0949914 -0.8355295851 -0.8798374 -0.2317780 -0.3687742 -0.794776958
## 5  0.1059753 -0.8355295851 -0.9096612 -0.1716428  1.7406778  0.189311544
## 6  1.6977747  0.3691393848  0.3419310 -0.1463378  1.1709563  0.540123103
##    PropCat.6  PropCat.7  PropCat.8  PropCat.9 PropCat.10 PropCat.11
## 1 -0.4980207 -0.4173594 -0.5069920 -0.4317488 -0.2850019 -0.2651424
## 2  1.6247065 -0.2567103 -0.3267324 -0.2787491 -0.2850019  0.3729320
## 3  0.7049846 -0.1518142 -0.3580118 -0.1788477 -0.2850019 -0.2651424
## 4 -0.4980207 -0.4173594 -0.5069920 -0.4317488 -0.2850019 -0.2651424
## 5 -0.4980207 -0.4173594 -0.2393435 -0.4317488 -0.2850019 -0.2651424
## 6  0.2907463 -0.4173594 -0.3897749  0.9611232 -0.2850019 -0.2651424
##   PropCat.12 PropCat.13 PropCat.14 PropCat.15
## 1  0.8905060 -0.2536688 -0.1912455  2.1061267
## 2 -0.2907978 -0.2536688 -0.1739337 -0.2505867
## 3  1.9761280 -0.2536688  2.5630697 -0.2505867
## 4 -0.2907978 -0.2536688 -0.2244217 -0.2505867
## 5 -0.2907978 -0.2536688 -0.1637435 -0.2505867
## 6 -0.2907978 -0.2536688 -0.1382100  3.9879993
cluster_data$cluster <- as.factor(cluster_data$cluster)
str(cluster_data)
## 'data.frame':    600 obs. of  47 variables:
##  $ cluster              : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 2 1 ...
##  $ Member.id            : num  -2.07 -2.07 -1.98 -1.98 -1.97 ...
##  $ SEC                  : num  1.341 0.447 -0.447 1.341 1.341 ...
##  $ FEH                  : num  0.8385 -0.0426 0.8385 -1.8048 -0.9237 ...
##  $ MT                   : num  0.424 0.424 0.424 -1.904 0.424 ...
##  $ SEX                  : num  -1.139 0.403 0.403 -2.681 0.403 ...
##  $ AGE                  : num  0.909 -1.402 0.909 0.909 -0.246 ...
##  $ EDU                  : num  -0.0198 -0.0198 0.4368 -1.8463 -0.0198 ...
##  $ HS                   : num  -0.9529 -0.0833 0.7862 -1.8224 -0.0833 ...
##  $ CHILD                : num  0.63 -1.013 0.63 1.452 -0.192 ...
##  $ CS                   : num  0.135 0.135 0.135 -1.836 0.135 ...
##  $ Affluence.Index      : num  -1.316 0.174 0.524 -1.492 -0.615 ...
##  $ No..of.Brands        : num  -0.403 0.863 0.863 -1.036 -0.403 ...
##  $ Brand.Runs           : num  0.12 0.89 2.044 -1.13 -0.938 ...
##  $ Total.Volume         : num  -0.501 0.265 1.439 -1.34 -0.465 ...
##  $ No..of..Trans        : num  -0.41 0.508 1.827 -1.558 -1.042 ...
##  $ Value                : num  -0.588 0.39 0.694 -1.385 -0.845 ...
##  $ Trans...Brand.Runs   : num  -0.464 -0.391 -0.352 -0.621 -0.172 ...
##  $ Vol.Tran             : num  -0.324 -0.264 -0.194 -0.161 0.898 ...
##  $ Avg..Price           : num  -0.4394 0.0522 -0.907 -1.1315 -1.2597 ...
##  $ Pur.Vol.No.Promo.... : num  -0.394 0.198 1.444 -1.271 -0.792 ...
##  $ Pur.Vol.Promo.6..    : num  -0.557 0.769 -0.119 -0.557 0.545 ...
##  $ Pur.Vol.Other.Promo..: num  -0.512 -0.125 0.766 -0.512 2.243 ...
##  $ Br..Cd..57..144      : num  0.223 -0.481 -0.376 -0.399 -0.446 ...
##  $ Br..Cd..55           : num  -0.175 -0.158 2.574 -0.209 -0.147 ...
##  $ Br..Cd..272          : num  -0.345 -0.345 -0.345 -0.345 -0.345 ...
##  $ Br..Cd..286          : num  -0.225 -0.225 0.143 -0.225 -0.225 ...
##  $ Br..Cd..24           : num  -0.237 -0.237 -0.237 -0.237 -0.237 ...
##  $ Br..Cd..481          : num  -0.254 0.405 -0.254 -0.254 -0.254 ...
##  $ Br..Cd..352          : num  -0.255 -0.255 -0.255 -0.255 -0.255 ...
##  $ Br..Cd..5            : num  -0.293 2.618 0.394 -0.293 -0.293 ...
##  $ Others.999           : num  -0.387 0.657 0.475 -1.095 0.106 ...
##  $ Pr.Cat.1             : num  -0.279442 0.385484 -0.000381 -0.83553 -0.83553 ...
##  $ Pr.Cat.2             : num  -0.252 0.263 0.215 -0.88 -0.91 ...
##  $ Pr.Cat.3             : num  -0.199 -0.15 2.531 -0.232 -0.172 ...
##  $ Pr.Cat.4             : num  -0.193 -0.106 -0.369 -0.369 1.741 ...
##  $ PropCat.5            : num  -0.24632 0.14198 -0.000177 -0.794777 0.189312 ...
##  $ PropCat.6            : num  -0.498 1.625 0.705 -0.498 -0.498 ...
##  $ PropCat.7            : num  -0.417 -0.257 -0.152 -0.417 -0.417 ...
##  $ PropCat.8            : num  -0.507 -0.327 -0.358 -0.507 -0.239 ...
##  $ PropCat.9            : num  -0.432 -0.279 -0.179 -0.432 -0.432 ...
##  $ PropCat.10           : num  -0.285 -0.285 -0.285 -0.285 -0.285 ...
##  $ PropCat.11           : num  -0.265 0.373 -0.265 -0.265 -0.265 ...
##  $ PropCat.12           : num  0.891 -0.291 1.976 -0.291 -0.291 ...
##  $ PropCat.13           : num  -0.254 -0.254 -0.254 -0.254 -0.254 ...
##  $ PropCat.14           : num  -0.191 -0.174 2.563 -0.224 -0.164 ...
##  $ PropCat.15           : num  2.106 -0.251 -0.251 -0.251 -0.251 ...
Q3_Model <- glm(cluster~.,family="binomial", data=cluster_data)
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
summary(Q3_Model)
## 
## Call:
## glm(formula = cluster ~ ., family = "binomial", data = cluster_data)
## 
## Deviance Residuals: 
##        Min          1Q      Median          3Q         Max  
## -4.483e-05  -2.100e-08  -2.100e-08  -2.100e-08   4.701e-05  
## 
## Coefficients:
##                         Estimate Std. Error z value Pr(>|z|)
## (Intercept)           -7.413e+01  1.842e+04  -0.004    0.997
## Member.id             -2.957e+00  2.916e+04   0.000    1.000
## SEC                    4.084e-01  3.253e+04   0.000    1.000
## FEH                    2.536e+00  3.639e+04   0.000    1.000
## MT                     5.160e+00  3.916e+04   0.000    1.000
## SEX                   -5.521e+00  7.445e+04   0.000    1.000
## AGE                    3.574e+00  3.860e+04   0.000    1.000
## EDU                    6.133e+00  4.465e+04   0.000    1.000
## HS                     1.697e-01  2.688e+04   0.000    1.000
## CHILD                  5.833e+00  2.644e+04   0.000    1.000
## CS                     1.096e-01  3.524e+04   0.000    1.000
## Affluence.Index       -3.505e-01  4.788e+04   0.000    1.000
## No..of.Brands         -6.710e+00  3.191e+04   0.000    1.000
## Brand.Runs             2.489e+00  4.892e+04   0.000    1.000
## Total.Volume          -7.538e+02  3.655e+06   0.000    1.000
## No..of..Trans          9.948e+00  5.935e+04   0.000    1.000
## Value                  4.056e+00  2.232e+05   0.000    1.000
## Trans...Brand.Runs    -1.234e+01  5.073e+04   0.000    1.000
## Vol.Tran               1.304e+01  5.939e+04   0.000    1.000
## Avg..Price            -1.290e+00  6.940e+04   0.000    1.000
## Pur.Vol.No.Promo....   1.794e+02  3.605e+06   0.000    1.000
## Pur.Vol.Promo.6..      2.121e+01  5.054e+05   0.000    1.000
## Pur.Vol.Other.Promo..  1.656e+01  3.369e+05   0.000    1.000
## Br..Cd..57..144       -2.163e+00  1.093e+06   0.000    1.000
## Br..Cd..55            -2.623e+01  1.142e+06   0.000    1.000
## Br..Cd..272           -3.952e+00  2.742e+05   0.000    1.000
## Br..Cd..286           -9.488e-01  5.200e+05   0.000    1.000
## Br..Cd..24            -3.764e+00  1.890e+05   0.000    1.000
## Br..Cd..481           -1.251e+01  3.481e+05   0.000    1.000
## Br..Cd..352            3.833e-01  4.486e+05   0.000    1.000
## Br..Cd..5              1.901e+00  1.860e+05   0.000    1.000
## Others.999            -2.287e+00  1.542e+06   0.000    1.000
## Pr.Cat.1               1.169e+02  9.200e+05   0.000    1.000
## Pr.Cat.2               2.414e+02  1.738e+06   0.000    1.000
## Pr.Cat.3               1.866e+02  1.318e+06   0.000    1.000
## Pr.Cat.4               1.156e+02  8.893e+05   0.000    1.000
## PropCat.5              2.484e+02  9.033e+05   0.000    1.000
## PropCat.6              8.864e+01  3.313e+05   0.000    1.000
## PropCat.7              1.015e+02  3.772e+05   0.000    1.000
## PropCat.8              6.041e+01  2.350e+05   0.000    1.000
## PropCat.9              3.357e+01  1.394e+05   0.000    1.000
## PropCat.10             3.255e+01  7.713e+04   0.000    1.000
## PropCat.11             6.205e+01  2.107e+05   0.000    1.000
## PropCat.12             9.660e+00  3.687e+04   0.000    1.000
## PropCat.13             4.071e+01  1.826e+05   0.000    1.000
## PropCat.14             1.605e+02  6.381e+05   0.000    1.000
## PropCat.15             4.391e+01  1.693e+05   0.000    1.000
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 2.6683e+02  on 599  degrees of freedom
## Residual deviance: 2.9285e-08  on 553  degrees of freedom
## AIC: 94
## 
## Number of Fisher Scoring iterations: 25